/*******************************************************************************
* Long-term effects of weather-induced migration on urban labor and housing 
  markets
* Busso & Chauvin

* Purpose: Building Shift-share instrument

*******************************************************************************/

/*******************************************************************************
* ENVIRONMENT
*******************************************************************************/
/* Geographic unities */
* Origin (rural)
global geoorig mca8010

* Rural/urban location identifier
global geodest mcarp8010

/* set geo file name and path*/
global geocodefile "${db_inp}/geo_codes_sat_cities.dta"

/* global to keep within migration or not*/
global w "" //	use this option ONLY if you want to keep within-cities migration

/*******************************************************************************
* OD MATRIX - SHARE OF RECENT MIGRATION BEFORE THE 1991 CENSUS
*******************************************************************************/
/* Recent migration data */
* Load Census data
cd "${census_harm}"
unzipfile Census_91
		
use muni mgprevmuni mgyrsmuni wtper using "Census_91", replace

* Merge with geographic unities data
merge m:1 muni using "${geocodefile}", nogen keepusing(${geodest}) ///
keep(match)

* Rename ${geodest} to main var of destination
rename ${geodest} ${geodest}_destination

* Remove non-arranjo destinations
drop if ${geodest}_destination == 1000000

* Drop it so we can run: rename mgprevmuni muni
drop muni

* Drop non-migrant or missing
drop if mgprevmuni == .

* Rename mgprevmuni to muni for merging with geo data
rename mgprevmuni muni

* Merge with geographic unities data
merge m:1 muni using "${geocodefile}", nogen keepusing(${geodest} ${geoorig}) ///
keep(match)

rename ${geodest} ${geodest}_ori
rename ${geoorig} ${geoorig}_ori

/* Drop within-cities migrants*/
if "$w" == "" drop if ${geodest}_ori == ${geodest}_destination

* Tag rural and non-rural origin
gen origin_urb = (${geodest}_ori != 1000000)
gen origin_rur = (${geodest}_ori == 1000000)

/* Create dummies for migrant*/
gen mit = 1

gen mit_rur = mit * origin_rur

* Restrict to migrants with data of origin
keep if mgyrsmuni <= 9

* Collapse by ${geodest}
collapse (sum) mit_rur [pw = wtper], by(${geoorig}_ori ${geodest}_destination)

rename ${geoorig}_ori ${geoorig}
rename ${geodest}_destination ${geodest}

keep ${geodest} ${geoorig} mit_rur 

/* Transform data into wide to sum all the migration into each ${geodest} */
qui fastreshape wide mit_rur, i(${geodest}) j(${geoorig})

* Total recent migrants per arranjo
egen mig_total = rowtotal(mi*)

* Generate shares for every  ${geoorig} based on total migration in each ${geodest}
quietly ds mit_rur1* mit_rur2* mit_rur3* mit_rur4* mit_rur5*
global mig `r(varlist)'

foreach migration in $mig {
	replace `migration' = `migration'/mig_total
}

drop mig_total

/* Return data into long */
qui fastreshape long mit_rur, i(${geodest}) j(${geoorig}_r)

/* Rename variables for clarification */
rename mit_rur mig_share_t_rur
rename ${geoorig}_r ${geoorig}

keep ${geoorig} ${geodest} mig_share_t_rur

save "${db_tmp}/od_matrices", replace

/*******************************************************************************
* SHIFT-SHARE INSTRUMENT
*******************************************************************************/
* Load Drought Index data
use "${db_tmp}/drought_index_spei8010.dta", replace

keep ${geoorig} drought_dws0009

merge 1:m ${geoorig} using "${db_tmp}/od_matrices", nogen keep(match)

* Rename $geor for merging
rename ${geoorig} ${geoorig}_origin

* Absolute value of the SPEI measure
gen abs_spei = abs(drought_dws0009)		

/* Generate the shift-share instrument */
gen shift_0009_dws_t_rur = drought_dws0009*mig_share_t_rur

gen shift_0009_dws_t_rur_abs = abs_spei*mig_share_t_rur
	
* Collapse information
collapse (sum) shift_*, by(${geodest})

* Rename MCARP for merging with immigration shares dataset
rename ${geodest} ${geodest}_destination

save "${db_tmp}/shift_share", replace

/*******************************************************************************
* SHIFT-SHARE CONTROLS
*******************************************************************************/
* Load Drought Index data from past decades
use "${db_tmp}/drought_controls_spei8010.dta", replace

keep ${geoorig} drought_dws8190

merge 1:m ${geoorig} using "${db_tmp}/od_matrices", nogen keep(match)

* Rename $geor for merging
rename ${geoorig} ${geoorig}_origin

/* Generate the shift-share instrument */
gen shift_8190_dws_t_rur = drought_dws8190*mig_share_t_rur

* Collapse information
collapse (sum) shift_*, by(${geodest})

* Rename MCARP for merging with immigration shares dataset
rename ${geodest} ${geodest}_destination

save "${db_tmp}/shift_share_drought_controls", replace

/*******************************************************************************
* 1991 CENSUS - TOTAL POPULATION (PER MCA)
*******************************************************************************/
/* Store total population per MCA in the 1991 Census to create the emigration rates */
use wtper muni using "$census_harm/Census_91", clear
merge m:1 muni using "${geocodefile}", nogen keepusing(${geodest}) ///
keep(match)

* Unique value for each person in the sample
gen pop_all = 1

* Collapse data
collapse (sum) pop_all [pw = wtper], by(${geodest})
rename ${geodest} ${geodest}_destination

tempfile base_pop_immg
save `base_pop_immg'

rm "Census_91.dta"

/*******************************************************************************
* 2010 CENSUS - DATA TYDING
*******************************************************************************/
cd "${census_harm}"
unzipfile Census_10
		
/* 2010 Census data - cleaning */
use muni wtper mgprevmuni age mgyrsmuni using "Census_10", clear

* Select only 2001-2010 migrants
drop if mgyrsmuni > 9 | mgprevmuni == .

* Select only working-age migrants
gen age_mig = age - mgyrsmuni
drop if age_mig < 15 | age_mig > 64
drop age_mig

* Rename ORIGIN municipality for merging
rename muni muni10
rename mgprevmuni muni

* Merge ORIGIN with geographic unities data
merge m:1 muni using "${geocodefile}", nogen keepusing(${geodest} ${geoorig}) ///
keep(match)
drop muni

* Rename  ${geoorig} =  ${geoorig}_origin; ${geodest} = ${geodest}_origin
rename ${geoorig}  ${geoorig}_origin
label var  ${geoorig}_origin " ${geoorig} of origin of migrants, 2010 Census"
rename ${geodest} ${geodest}_origin
label var ${geodest}_origin "${geodest} of origin of migrants, 2010 Census"

* Rename DESTINATION municipality for merging
rename muni10 muni

* Merge DESTINATION with geographic unities data
merge m:1 muni using "${geocodefile}", nogen keepusing(${geoorig} ${geodest}) ///
keep(match)

* Rename  ${geoorig} =  ${geoorig}_origin; ${geodest} = ${geodest}_origin
rename ${geoorig} ${geoorig}_destination
rename ${geodest} ${geodest}_destination

/* 2001-2010 migration */
* Total rural-urban migrants arriving in the city
gen mig_10_rural = (${geodest}_origin == 1000000 & ${geodest}_destination != 1000000)

/* drop within city migration if requested*/
if "$w" == "" drop if ${geodest}_origin == ${geodest}_destination 

/* Collapse data */
collapse (sum) mig_10_rural [pw = wtper], by(${geodest}_destination)

* Save data
tempfile 2010migration
save `2010migration'

rm "Census_10.dta"

/*******************************************************************************
*  MERGE MIGRATION - 2000 AND 2010 CENSUS
*******************************************************************************/
/* Merge datasets */
use `2010migration', clear
merge 1:1 ${geodest}_destination using `base_pop_immg', nogen

/* Create immigration rate */
* Total rural-urban 2001-2010 migrants arriving in the city
replace mig_10_rural = ln(mig_10_rural) - ln(pop_all)

rename ${geodest}_destination ${geodest}
keep ${geodest} mig_10_rural
drop if ${geodest} == 1000000

/* Save dataset for the main specification */
save "${db_tmp}/immig_ibge", replace

